##### ################################################### ######
#####                                                     ######
#####   Input: Raw YouGov data                            ######
#####   Output: recoded variables to keep                 ######
#####                                                     ######
##### ################################################### ######

setwd("/Users/lotte/Dropbox/PhD/style_experiment/replication")
rm(list=ls())

# Load libraries

library(data.table) # CRAN v1.14.2
library(plyr) # CRAN v1.8.6 
library(dplyr) # CRAN v1.0.9
library(tidyverse) # CRAN v1.3.1

# Load data (YouGov raw)

style_experiment <- read.csv("data/yougov_raw_results.csv")

# Create covariate dataset

covariates <- style_experiment %>%
              select(ID,age,profile_gender,profile_socialgrade_cie, profile_GOR, 
                     voted_ge_2019, pastvote_ge_2019,pastvote_EURef, politics_scale_profile_update, 
                     political_attention, highest_education_gce, Weight, starts_with("name"),
                     starts_with("surname"), starts_with("qspeech"), starts_with("qpolicy"),
                     starts_with("qstyle"), starts_with("qstyle_prevalence")) %>%
              pivot_longer(-c(ID,age,profile_gender,profile_socialgrade_cie, profile_GOR, voted_ge_2019,
                               pastvote_ge_2019, pastvote_EURef, politics_scale_profile_update,
                               political_attention, highest_education_gce, Weight),
               names_to = c(".value","set"),
               names_pattern = "(.*)([1-3])")

# Create outcomes dataset

outcome1 <- ((style_experiment%>%select(ID,starts_with("task")))[1:6])
names(outcome1)<- c("ID", "perceived_emotion","perceived_aggression",
                    "perceived_evidence","perceived_competence","perceived_likeability")
outcome1$set <- 1

outcome2 <- ((style_experiment%>%select(ID,starts_with("task")))[c(1, 7:11)])
names(outcome2)<- c("ID", "perceived_emotion","perceived_aggression","perceived_evidence",
                    "perceived_competence","perceived_likeability")
outcome2$set <- 2

outcome3 <- ((style_experiment%>%select(ID,starts_with("task")))[c(1, 12:16)])
names(outcome3)<- c("ID", "perceived_emotion","perceived_aggression","perceived_evidence",
                    "perceived_competence","perceived_likeability")
outcome3$set <- 3

outcomes <- bind_rows(outcome1,outcome2,outcome3)
experiment_clean <- merge(covariates,outcomes,by=c("ID", "set"))

# Create variables
# 1) Experimental manipulations 

## Style type
experiment_clean$style <- factor(NA, levels = c("Emotion", "Aggression", "Evidence"))
experiment_clean$style[experiment_clean$qstyle==1] <- "Emotion"
experiment_clean$style[experiment_clean$qstyle==2] <- "Aggression"
experiment_clean$style[experiment_clean$qstyle==3] <- "Evidence"
table(experiment_clean$style)

experiment_clean$style_prevalence <- factor(NA, levels = c("Control", "Treatment", "Statistics", "Anecdote"))
experiment_clean$style_prevalence[experiment_clean$qstyle_prevalence==2] <- "Control"
experiment_clean$style_prevalence[experiment_clean$qstyle_prevalence==4] <- "Treatment"
experiment_clean$style_prevalence[experiment_clean$qstyle_prevalence==3] <- "Statistics"
experiment_clean$style_prevalence[experiment_clean$qstyle_prevalence==1] <- "Anecdote"
table(experiment_clean$style_prevalence)

## Style treatment groups 
experiment_clean$prevalence_evidence <- factor(NA, levels = c("Statistics", "Anecdote"))
experiment_clean$prevalence_evidence[experiment_clean$style=="Evidence" & experiment_clean$style_prevalence=="Statistics"] <- "Statistics"
experiment_clean$prevalence_evidence[experiment_clean$style=="Evidence" & experiment_clean$style_prevalence=="Anecdote"] <- "Anecdote"
table(experiment_clean$prevalence_evidence)

experiment_clean$objective_style_emotion <- factor(NA, levels = c("Non-emotional", "Emotional"))
experiment_clean$objective_style_emotion[experiment_clean$style=="Emotion" & experiment_clean$style_prevalence=="Control"] <- "Non-emotional"
experiment_clean$objective_style_emotion[experiment_clean$style=="Emotion" & experiment_clean$style_prevalence=="Treatment"] <- "Emotional"
table(experiment_clean$objective_style_emotion)

experiment_clean$objective_style_aggression <- factor(NA, levels = c("Non-aggressive", "Aggressive"))
experiment_clean$objective_style_aggression[experiment_clean$style=="Aggression" & experiment_clean$style_prevalence=="Control"] <- "Non-aggressive"
experiment_clean$objective_style_aggression[experiment_clean$style=="Aggression" & experiment_clean$style_prevalence=="Treatment"] <- "Aggressive"
table(experiment_clean$objective_style_aggression)

experiment_clean$objective_style <- factor(NA, levels = c("Control", "Treatment"))
experiment_clean$objective_style[experiment_clean$style=="Emotion" & experiment_clean$style_prevalence=="Control" |
                                   experiment_clean$style=="Aggression" & experiment_clean$style_prevalence=="Control" |
                                   experiment_clean$style=="Evidence" & experiment_clean$style_prevalence=="Statistics"] <- "Control"
experiment_clean$objective_style[experiment_clean$style=="Emotion" & experiment_clean$style_prevalence=="Treatment" |
                                   experiment_clean$style=="Aggression" & experiment_clean$style_prevalence=="Treatment" |
                                   experiment_clean$style=="Evidence" & experiment_clean$style_prevalence=="Anecdote"] <- "Treatment"
table(experiment_clean$objective_style)

## Female stereotype-congruent 
experiment_clean$female_stereotype_congruent <- factor(NA, levels = c("Incongruent", "Congruent"))
experiment_clean$female_stereotype_congruent[experiment_clean$style=="Emotion" & experiment_clean$style_prevalence=="Control" |
                                               experiment_clean$style=="Aggression" & experiment_clean$style_prevalence=="Treatment" |
                                               experiment_clean$style=="Evidence" & experiment_clean$style_prevalence=="Statistics"] <- "Incongruent"
experiment_clean$female_stereotype_congruent[experiment_clean$style=="Emotion" & experiment_clean$style_prevalence=="Treatment" |
                                               experiment_clean$style=="Aggression" & experiment_clean$style_prevalence=="Control" |
                                               experiment_clean$style=="Evidence" & experiment_clean$style_prevalence=="Anecdote"] <- "Congruent"
table(experiment_clean$female_stereotype_congruent)

## Categorical styles and treatment groups for pooled analysis 
experiment_clean$style_categorical <- factor(NA, levels = c("Control", "Emotion", "Aggression", "Statistics", "Anecdote"))
experiment_clean$style_categorical[experiment_clean$style_prevalence=="Control"] <- "Control"
experiment_clean$style_categorical[experiment_clean$style_prevalence=="Treatment" & experiment_clean$style=="Emotion"] <- "Emotion"
experiment_clean$style_categorical[experiment_clean$style_prevalence=="Treatment" & experiment_clean$style=="Aggression"] <- "Aggression"
experiment_clean$style_categorical[experiment_clean$style_prevalence=="Statistics" & experiment_clean$style=="Evidence"] <- "Statistics"
experiment_clean$style_categorical[experiment_clean$style_prevalence=="Anecdote" & experiment_clean$style=="Evidence"] <- "Anecdote"
table(experiment_clean$style_categorical)

## Policy areas
experiment_clean$policy <- factor(NA, levels = c("Transport", "Housing", "Health"))
experiment_clean$policy[experiment_clean$qpolicy==1] <- "Transport"
experiment_clean$policy[experiment_clean$qpolicy==2] <- "Housing"
experiment_clean$policy[experiment_clean$qpolicy==3] <- "Health"
table(experiment_clean$policy)

experiment_clean$policy_housing <- ifelse(experiment_clean$policy=="Housing", "TRUE", "FALSE")
experiment_clean$policy_health <- ifelse(experiment_clean$policy=="Health", "TRUE", "FALSE")
experiment_clean$policy_transport <- ifelse(experiment_clean$policy=="Transport", "TRUE", "FALSE")

## MP gender 
experiment_clean$mp_gender <- ifelse(experiment_clean$name=="Jack" | experiment_clean$name=="Peter" | experiment_clean$name=="Adam", "Man", "Woman")

# 2) Respondent characteristics

## Voted in 2019 General Election
experiment_clean$voted_2019 <- ifelse(experiment_clean$voted_ge_2019==1, "TRUE", "FALSE")

## Party voted for in 2019 General Election
experiment_clean$party_voted_for <- ifelse(experiment_clean$pastvote_ge_2019==1, "Conservative", "Labour")

## Brexit vote 
experiment_clean$brexit_vote <- ifelse(experiment_clean$pastvote_EURef==1, "Remain", "Leave")

## Gender
experiment_clean$respondent_gender <- ifelse(experiment_clean$profile_gender==1, "Man", "Woman")

## Social grade
experiment_clean$social_grade <- ifelse(experiment_clean$profile_socialgrade_cie==1 | experiment_clean$profile_socialgrade_cie==2 |
                                          experiment_clean$profile_socialgrade_cie==3, "ABC1", "C2DE")

## Age
experiment_clean$age_1 <- cut(experiment_clean$age, breaks = c(17, 34, 54, 96),
                              labels = c("18-34", "35-54", "55+"))
experiment_clean$age_factor <- factor(NA, levels = c("18-34", "35-54", "55+"))
experiment_clean$age_factor[experiment_clean$age_1=="18-34"] <- "18-34"
experiment_clean$age_factor[experiment_clean$age_1=="35-54"] <- "35-54"
experiment_clean$age_factor[experiment_clean$age_1=="55+"] <- "55+"
table(experiment_clean$age_factor)

## Education 
experiment_clean$degree_educated <- ifelse(experiment_clean$highest_education_gce==6, "TRUE", "FALSE")
table(experiment_clean$degree_educated)
experiment_clean$degree_educated <- as.factor(experiment_clean$degree_educated)

## Remove "don't knows" 
experiment_clean <- experiment_clean %>%
  mutate_at(vars(politics_scale_profile_update), ~ na_if(x=., y=8))

experiment_clean$left_right_placement <- experiment_clean$politics_scale_profile_update

experiment_clean <- experiment_clean %>%
  mutate_at(vars(perceived_emotion, perceived_aggression, perceived_evidence,
                 perceived_competence, perceived_likeability), ~ na_if(x=., y=6))

experiment_clean$speech_number <- experiment_clean$qspeech

# Drop not needed variables 
experiment_clean$profile_gender <- NULL
experiment_clean$profile_socialgrade_cie <- NULL
experiment_clean$profile_GOR <- NULL
experiment_clean$voted_ge_2019 <- NULL
experiment_clean$pastvote_EURef <- NULL
experiment_clean$pastvote_ge_2019 <- NULL
experiment_clean$qpolicy <- NULL
experiment_clean$qstyle <- NULL
experiment_clean$age_1 <- NULL
experiment_clean$qstyle_prevalence <- NULL
experiment_clean$highest_education_gce <- NULL
experiment_clean$politics_scale_profile_update <- NULL
experiment_clean$qspeech <- NULL


# Subset and save data 
emotion <- experiment_clean[experiment_clean$style=="Emotion",]
aggression <- experiment_clean[experiment_clean$style=="Aggression",]
evidence <- experiment_clean[experiment_clean$style=="Evidence",]

save(emotion, file="data/emotion_data.Rdata")
save(aggression, file="data/aggression_data.Rdata")
save(evidence, file="data/evidence_data.Rdata")

style_data <- experiment_clean
save(style_data, file = "data/style_data.Rdata")
